package com.galeno.sparksql

import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{SparkSession, types}
import org.apache.spark.sql.types.{DataTypes, StructField, StructType}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/9/420:46
 */
object SparkSql03 {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache").setLevel(Level.WARN)

    val spark = SparkSession.builder()
      .master("local")
      .appName("llll")
      .getOrCreate()
    val schema: StructType = StructType(Seq(
      StructField("id", DataTypes.IntegerType),
      StructField("name", DataTypes.StringType),
      StructField("role", DataTypes.StringType),
      StructField("energy", DataTypes.LongType)
    ))

    val df1 = spark.read.schema(schema).json("data/battel_json.txt")
    df1.printSchema()
    df1.show(100, false)

    //复杂json解析案例
    val df2 = spark.read.json("data/app_log_2021-06-05.log")
    df2.printSchema()
    df2.show(5, false)
    df2.selectExpr("properties.account").show(5, false)

    /**
     * 每一行中的properties中的属性不是完全一样,会导致结果荣冗余
     *
     * 可以自定义Schema把properties的字段定义为Map类型
     *
     *
     */
    var schema2 = StructType(Seq(
      StructField("account", DataTypes.StringType),
      StructField("appId", DataTypes.StringType),
      StructField("appVersion", DataTypes.StringType),
      StructField("carrier", DataTypes.StringType),
      StructField("deviceId", DataTypes.StringType),
      StructField("deviceType", DataTypes.StringType),
      StructField("eventId", DataTypes.StringType),
      StructField("ip", DataTypes.StringType),
      StructField("latitude", DataTypes.DoubleType),
      StructField("longitude", DataTypes.DoubleType),
      StructField("netType", DataTypes.StringType),
      StructField("osName", DataTypes.StringType),
      StructField("osVersion", DataTypes.StringType),

      StructField("properties", DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType)),

      StructField("releaseChannel", DataTypes.StringType),
      StructField("resolution", DataTypes.StringType),
      StructField("sessionId", DataTypes.StringType),
      StructField("timeStamp", DataTypes.LongType)
    )
    )
    val df3= spark.read.schema(schema2).json("data/app_log_2021-06-05.log")
    df3.printSchema()
    df3.show()
    df3.selectExpr("properties['account']").show()
    spark.stop()


  }

}
